########################################################################
########################################################################
# Replication code for:
#
# @article{CansunarWhoisHighIncome,
# title={Who is High-Income, Anyway?:\\Social Comparison, Subjective Group-Identification, and Preferences over Progressive Taxation},
# author={Cansunar, Asli},
# journal={Journal of Politics},
# year={2020},
# }
#
# Comments, questions: asli.cansunar@politics.ox.ac.uk
########################################################################
########################################################################

rm(list=ls())
options(scipen=999)
setwd("~/Dropbox/_Papers for publication/Who is High Income, Anyway?/_JOP/Final version/Replication materials")
#install.packages("readstata13")
require(readstata13)
#install.packages("dplyr")
require(dplyr)
#install.packages("readr")
library(readr)
#install.packages("githubinstall")
library(githubinstall)
#install.packages("sjPlot")
library(sjPlot)
#install.packages("sjmisc")
library(sjmisc)
library(ggplot2)
#install.packages("lmtest")
#install.packages("sandwich")
#install.packages("multiwayvcov")
library(lmtest)
library(sandwich)
library(multiwayvcov)
#install.packages("mgcv")
library(mgcv)
theme_set(theme_sjplot())

#install.packages("ggmap")
require(ggmap)
#install.packages("maps")
require(maps)
library(itsadug)
#install.packages("dummies")
library(dummies)
library(stargazer)
library(foreign)
library(tidyverse)
#install.packages("readxl")
library("readxl")
#install.packages("gapminder")
library(gapminder)

data <- read.dta("issp2009a.dta")

###Keep countries where respondents provide gross household income - income before taxes and transfers.
data <- subset(data,code=="AU-Australia" | code=="BG-Bulgaria" |code=="CY-Cyprus"|code=="DK-Denmark"|code=="CN-China"|code=="FI-Finland"|code=="GB-GBN-Great Britain and/or United Kingdom"|code=="HU-Hungary"|code=="IS-Iceland"|code=="IL-Israel"|code=="JP-Japan"|code=="KR-South Korea"|code=="NZ-New Zealand"|code=="NO-Norway"|code=="PH-Philippines"|code=="RU-Russia"|code=="SE-Sweden"|code=="TR-Turkey"|code=="TW-Taiwan"|code=="US-United States"|code==" VE-Venezuela"|code=="ZA-South Africa")

###Keep complete cases for GAMs analyses
mydata <- data[complete.cases(data$income,data$college, data$female, data$S1r, data$empl_part, data$unempl, data$retired, data$tunion, data$church, data$married,data$college,data$prog_2,data$ln_dist_ceo, data$ln_dist_worker, data$code,data$age_res, data$self_placement,data$right),]
detach(mydata)
attach(mydata)

###Transform variables
mydata$age_res<-as.numeric(as.character(mydata$age_res))
data$self_placement<-as.numeric(data$self_placement)
mydata$self_placement<-as.numeric(mydata$self_placement)
mydata$ccode<-as.numeric(mydata$code)
mydata$v37<-as.numeric(mydata$v37)
mydata$party <- NA
mydata$party[mydata$party_lr=="Left, center left"] <- "Left"
mydata$party[mydata$party_lr=="Far left etc"] <- "Left"
mydata$party[mydata$party_lr=="Center, liberal"] <- "Center"
mydata$party[mydata$party_lr=="Far right etc"] <- "Right"
mydata$party[mydata$party_lr=="Right, conservative"] <- "Right"



###Figure 1###
gdata <- read_excel("net1.xlsx")
gdata$D<-as.factor("Decile")
gdata$D <- as.numeric(as.vector(gdata$Decile))

setwd("~/Dropbox/_Papers for publication/Who is High Income, Anyway?/_JOP/Final version/Pictures")
pdf("figure1.pdf",   width=6, height=4) 
ggplot(gdata, aes(Decile), ylim(-50:50)) + 
  geom_bar(data = subset(gdata, Type == "Transfers"), 
           aes(y = Value, fill = Type), stat = "identity", position = "dodge") +
  geom_bar(data = subset(gdata, Type == "Contributions"), 
           aes(y = Value, fill = Type), stat = "identity", position = "dodge") + 
  geom_hline(yintercept = 0,colour = "grey90") +
  geom_point(data = subset(gdata, Type == "Net Transfers"), aes( y=Value, group=Type))+
  geom_line(data = subset(gdata, Type == "Net Transfers"), aes( y=Value, group=Type, color="Net transfers"))+ 
  ylab("Percentage of disposible income")  +
  scale_fill_manual(values=c("gray41","grey"),  labels=c("Income taxes","Transfers"))  + 
  scale_color_manual(name = "", values = c("Net transfers" = "black"))+
  theme(legend.title = element_blank())+
  scale_x_continuous(name ="Income decile", 
                     breaks=c(1,2,3,4,5,6,7,8,9,10),labels=c("1","2","3","4","5","6","7","8","9","10"))
dev.off()




####Figure 2####
pdf("figure2.pdf",   width=8, height=5)
par(mfrow=c(1,2))
boxplot(self_placement~income,data=mydata, 
        xlab="Income", ylab="Self Placement",ylim=c(1, 10),xlim=c(1,10),main="Income vs. Self Placement")
plot(data$income,data$ln_dist_ceo, xlab="Income", ylab="Subjective income distance from a CEO",ylim=c(-13,23), main="CEO Perceptions")
abline(h=4.06, col="black", lwd=2.5)
dev.off()


###Semiparametric GAM: Self-placement
mreg1 <- gam(self_placement ~ s(income)+ s(ln_dist_ceo, k=5) + s(ln_dist_worker)+ female+
          church+ college+ tunion +unempl+ empl_part+ retired+ married+ age_res+ right+S1r 
          +s(ccode,bs="re"), data = mydata, method="GCV.Cp")
summary(mreg1)

###Figure 3
pdf("figure3.pdf",   width=7, height=10)
par(mfrow=c(2,1))
plot_smooth(mreg1, view="ln_dist_ceo", 
            rug=FALSE, add=FALSE, col='black', rm.ranef=FALSE, ylim=c(3, 8),xlim=c(-5, 15), ylab="Predicted Values of Self-Placement", xlab="Distance to a CEO",cond=list(income=10,ccode=20), v0=0)
vis.gam(mreg1, plot.type = "contour", color = "heat", 
        main="Predicted values of Self Placement",
        ylab="Perceived Distantance to a CEO's Earnings", xlab="Income decile of the respondent",view=c("income","ln_dist_ceo"),cond=list(ccode=20),contour.col ="black",ylim=c(-5,20),labcex=0.98,cex.axis=1.2,lwd=2,cex.lab=1.3,col.lab="black",font=2,ncol=10,n.grid=200)
dev.off()



###Semiparametric Logistic GAMs: Preferences over progressive taxation
mlog1 <- gam(prog_2~ s(income) + s(self_placement)+s(ln_dist_ceo, k=5)+ female+ S1r+church+ college+ tunion +unempl+ empl_part+ married+ age_res+ right+ s(ccode, bs="re"), 
             data = mydata, method="GCV.Cp",family="binomial")

summary(mlog1)
logit <- function(xb){
  1/(1+exp(-xb))
}

###Figure 4###
pdf("figure4.pdf",   width=7, height=5)
par(mfrow=c(1,2))
plot_smooth(mlog1, view="self_placement", 
            rug=FALSE, add=FALSE, col='black', rm.ranef=FALSE,transform=logit, ylim=c(0.4, 1),xlim=c(1, 10),ylab="Predicted Probability", xlab="Subjective Self-placement", hide.label = TRUE,cond=list(income=10,ccode=20, right=1))
plot_smooth(mlog1, view="ln_dist_ceo", 
            rug=FALSE, add=FALSE, col='black', rm.ranef=FALSE,transform=logit,ylim=c(0.4, 1),xlim=c(-2, 15),ylab="Predicted Probability", xlab="Subjective Distance to a CEO",v0=0,hide.label = TRUE,cond=list(income=10,ccode=20, right=1))
dev.off()



###Figure 5###
pdf("figure5.pdf",   width=7, height=15)
par(mfrow=c(3,1))
vis.gam(mlog1, type="response",plot.type = "contour", color = "heat", 
        main="Predicted Probability of Supporting Progressive Taxation", 
        ylab="Self-placement of the respondent", xlab="Income decile of the respondent",view=c("income","self_placement"),cond=list(ccode=20),contour.col ="black",labcex=0.95,cex.axis=1.3,lwd=2,cex.lab=1.2,col.lab="black",font=2,ncol=10,n.grid=200)
vis.gam(mlog1, type="response", plot.type = "contour", color = "heat", 
             main="Predicted Probability of Supporting Progressive Taxation", 
                ylab="Subjective distance from a CEO", xlab="  Income decile of the respondent",view=c("income","ln_dist_ceo"),cond=list(ccode=20), contour.col ="black",ylim=c(-5,15),labcex=0.98,cex.axis=1.2,lwd=2,cex.lab=1.3,col.lab="black",font=2,ncol=10,n.grid=200)
vis.gam(mlog1, type="response", plot.type = "contour", color = "heat", 
         main="Predicted Probability of Supporting Progressive Taxation", 
         ylab="Subjective distance from a CEO", xlab="Self-placement",view=c("self_placement","ln_dist_ceo"),ylim=c(-5,15),cond=list(ccode=3),contour.col ="black",labcex=0.98,cex.axis=1.3,lwd=2,cex.lab=1.2,col.lab="black",font=2,ncol=10,n.grid=200)
dev.off()
 


#####################################################################################################################

###Appendix 

###Table A1 
stargazer(mydata,mydatastyle="apsr", type = "text",
          out="~/Dropbox/_Papers for publication/Who is High Income, Anyway?/_JOP/Final version/tableA1.tex"
          , title = "Summary Statistics", label = "sum_stats")

###Figure A1 
pdf("figureA1.pdf",   width=8, height=5)
ggplot(mydata,mapping=aes(x= as.factor(income), y=prog_2)) +
   stat_summary(fun.y="mean", geom="bar")+
  xlab("Income")+ 
  ylab("Support for Progressive Taxation")+ 
  facet_wrap(~code) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
dev.off()  

###Figure A2
 pdf("figureA2.pdf",   width=8, height=5)
   ggplot(mydata,mapping=aes(x= as.factor(income), y=self_placement)) +
     geom_boxplot() +
     xlab("Income")+ 
     ylab("Self-placement")+ 
     facet_wrap(~code) +
     theme(axis.text.x = element_text(angle = 45, hjust = 1))
  dev.off()


  
###Figure A3  
pdf("figureA3.pdf",   width=8, height=5)
plot(data$income,data$ln_dist_ceo, xlab="Income decile", ylab="Subjective income distance from a CEO",ylim=c(-13,23), xlim=c(1,10),main="CEO Pay Ratios - Perceptions vs Reality")
abline(h=2.42, col="red", lwd=1.5)
abline(h=2.3, col="purple", lwd=1.5)
abline(h=1.77, col="blue", lwd=1.5)
abline(h=2.26, col="green", lwd=1.5)
abline(h=2.1, col="orange", lwd=1.5)
abline(h=1.82, col="yellow", lwd=1.5)
legend('bottom',
       legend=c("USA", "UK", "Sweden","South Africa","China", "South Korea"),
       col=c("red","purple", "blue", "green","orange", "yellow"), lwd=3,
       bty='n',cex=0.6, horiz = TRUE)
dev.off()

###Figure A4  
pdf("figureA4.pdf",   width=6, height=4) 
boxplot(ln_dist_ceo~income,data=data, 
        xlab="Income", ylab="Perceived distance to a CEO",ylim=c(-8, 24))
dev.off()

###Figure A5  
pdf("figureA5.pdf",   width=6, height=4) 
boxplot(ln_dist_ceo~self_placement,data=data, 
        xlab="Self placement", ylab="Perceived distance to a CEO",ylim=c(-8, 24))
dev.off()

###Figure A6


pdf("figureA6.pdf",   width=6, height=4) 
boxplot(self_placement~party,data=mydata, 
        xlab="Ideology", ylab="Self Placement",ylim=c(1, 10))
dev.off()

###Figure A7

pdf("figureA7.pdf",   width=6, height=4) 
boxplot(ln_dist_ceo~party,data=mydata, 
        xlab="Ideology", ylab="Perceived distance to a CEO",ylim=c(1, 20))
dev.off()

###Table A3
gamtabs(mreg1, caption='Summary of mreg1',  out="~/Dropbox/_Papers for publication/Who is High Income, Anyway?/_JOP/Final version/tableA4.tex")


###Table A4
gamtabs(mlog1, caption='Summary of mlog1',  out="~/Dropbox/_Papers for publication/Who is High Income, Anyway?/_JOP/Final version/tableA3.tex")



## A 3.8 Additional Analysis
mydata$taxes <- NA
mydata$taxes[mydata$v37==1] <- 0
mydata$taxes[mydata$v37==2] <- 0
mydata$taxes[mydata$v37==3] <- 0
mydata$taxes[mydata$v37==4] <-1
mydata$taxes[mydata$v37==5] <- 1
summary(mydata$taxes)

mhigh <- gam(taxes ~ s(income)+ s(ln_dist_ceo,k=5) +s(self_placement) + female+ church+ college+ tunion +unempl+ empl_part+ retired+ married+ age_res+ right+S1r +s(ccode,bs="re"), 
             data = mydata, method="GCV.Cp",family="binomial")

##Figure A8
pdf("figureA8.pdf",   width=7, height=5)
vis.gam(mhigh, type="response",plot.type = "contour", color = "heat", 
        main="Probability of Agreeing Taxes for those with High Incomes are Low", 
        ylab="Self placement of the respondent", xlab="Income decile of the respondent",view=c("income","self_placement"),cond=list(ccode=20),contour.col ="black")
dev.off()

##Figure A9
pdf("figureA9.pdf",   width=7, height=5)
vis.gam(mhigh, type="response",plot.type = "contour", color = "heat", 
        main="Probability of Agreeing Taxes for those with High Incomes are Low", 
        ylab="Subjective distance from a CEO", xlab="Income decile of the respondent",view=c("income","ln_dist_ceo"),cond=list(ccode=20), contour.col ="black",ylim=c(-5,15))
dev.off()

##Figure A10
pdf("figureA10.pdf",   width=7, height=5)
vis.gam(mhigh, type="response",plot.type = "contour", color = "heat", 
        main="Probability of Agreeing Taxes for those with High Incomes are Low", 
        ylab="Subjective distance from a CEO", xlab="Self placement",view=c("self_placement","ln_dist_ceo"),cond=list(ccode=20), contour.col ="black",ylim=c(-5,15))
dev.off()

###Table A7
gamtabs(mhigh, caption='Summary of mhigh',  out="~/Dropbox/_Papers for publication/Who is High Income, Anyway?/_JOP/Final version/tableA7.tex")



